Chapter 9 HMSC analysis

9.1 Load data

load("data/data.Rdata")

9.2 Compute variance partitioning

# Select modelchain of interest
load("hmsc_bookdown/fit_model1_250_10.Rdata")

varpart=computeVariancePartitioning(m)

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=c("Elevation","Tree_cover", "Anthropization_cover", "logseqdepth","Random: Sampling_point", "Random: Transect"))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_kw8zy9srfnjk2hebzomg
variable mean sd
Elevation 13.76615 10.766178
Tree_cover 8.40550 5.281159
Anthropization_cover 12.37289 6.324097
logseqdepth 21.58481 13.751217
Random: Sampling_point 27.27780 17.329095
Random: Transect 16.59284 12.592882
# Basal tree
varpart_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("Elevation","Tree_cover","Anthropization_cover", "logseqdepth","Random: Sampling_point", "Random: Transect")))) %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label)))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#34738f","#cccccc","#ed8a45","#b2b530","#be3e2b","#83bb90","#f6de6c", "#122f3d"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity",
             color = "white",
             size=0.1)+
      labs(fill="Variable")

vertical_tree

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#plotBeta(hM=m, post=getPostEstimate(hM=m, parName="Beta"), param = "Support", plotTree = TRUE, covNamesNumbers=c(1,0))

# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(value = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
             Elevation=3,
           Tree_cover=4,
           Anthropization_cover=5,
             logseqdepth=6
           ) %>%
    select(genome,intercept,Elevation,Tree_cover,Anthropization_cover,logseqdepth) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Reference tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames)


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>%
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() +
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

vtree_top <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(layout = "rectangular") + 
  layout_dendrogram() +  # Ensure correct layout
  coord_flip()
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
vtree_left <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#create composite figure
grid.arrange(grobs = list(vtree_top,matrix,vtree_left),
             layout_matrix = rbind(c(4,1,1,1,1,1,1,1,1,1,1,1),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2)))

9.3 Phylogenetic signal

mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))
  5%  50%  95% 
0.99 1.00 1.00 

9.4 Elevation predictions

gradient = seq(940, 2350, by = 100)
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred_elevation <- constructGradient(m,
                      focalVariable = "Elevation",
                      non.focalVariables = list(logseqdepth=list(1),Tree_cover=list(1), Anthropization_cover=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(elevation=rep(gradient,1000)) %>%
                      pivot_longer(-c(elevation), names_to = "genome", values_to = "value")

9.4.1 Responses to elevation

# Select desired support threshold
support=0.9
negsupport=1-support

#Get phylum colors from the EHI standard
phylum_colors <- genome_metadata %>%
    left_join(read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv"), by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    #slice(2:5) %>%
    select(colors) %>%
    pull()

getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    filter(variable=="Elevation") %>%
    select(genome,trend) %>%
    left_join(pred_elevation, by=join_by(genome==genome)) %>%
    group_by(genome, trend, elevation) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=elevation, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=phylum_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Elevation") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
)

9.5 Functional predictions

9.5.1 Element level

elements_table <- genome_gifts_filt %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p0<-positive_filtered<-element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) 

if (nrow(positive_filtered) > 0) {
  positive_filtered %>%
  tt() |>
      style_tt(
        i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}


#Negatively associated
p1<-negative_filtered<-element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) 

if (nrow(negative_filtered) > 0) {
  negative_filtered %>%
  tt() |>
      style_tt(
        i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}
# Positively associated
p0 %>%
  tt()
tinytable_yiy7gsmuvu93d9r6m6rs
trait mean p1 p9 positive_support negative_support
D0613 0.034264683 0.0113319085 0.060146258 0.971 0.029
B0106 0.017715307 0.0039875628 0.033388901 0.966 0.034
D0609 0.014654471 0.0028010131 0.027577745 0.962 0.038
D0205 0.008600377 0.0020467751 0.015207831 0.953 0.047
D0207 0.031552241 0.0069001701 0.067737031 0.949 0.051
B0802 0.037714918 0.0086104696 0.071624679 0.947 0.053
D0304 0.014938085 0.0035430830 0.027059303 0.945 0.055
B0214 0.019893750 0.0026561423 0.037824383 0.931 0.069
B0217 0.013929202 0.0015844388 0.028027942 0.925 0.075
D0817 0.004336948 0.0003403666 0.008888889 0.919 0.081
#Negatively associated
p1%>%
  tt()
tinytable_11yjz3d71yxxtn3ce8z7
trait mean p1 p9 positive_support negative_support
B0208 -0.0344477317 -0.0618098460 -1.039294e-02 0.016 0.984
S0104 -0.0209740208 -0.0337504690 -8.006082e-03 0.022 0.978
D0805 -0.0004581602 -0.0009344457 -6.018417e-05 0.035 0.965
B0710 -0.0061948836 -0.0108430396 -1.870104e-03 0.037 0.963
B0310 -0.0028164249 -0.0048520436 -8.904523e-04 0.043 0.957
B1029 -0.0003677198 -0.0007876956 -4.280235e-05 0.045 0.955
D0604 -0.0252076749 -0.0512963980 -5.101516e-03 0.050 0.950
D0903 -0.0047866601 -0.0089646580 -1.108197e-03 0.050 0.950
B0218 -0.0116625901 -0.0223161257 -2.061981e-03 0.053 0.947
B0703 -0.0192156802 -0.0373477215 -2.861509e-03 0.059 0.941
D0206 -0.0197688096 -0.0392643553 -3.454538e-03 0.061 0.939
D0701 -0.0051397234 -0.0101283806 -6.650890e-04 0.068 0.932
D0509 -0.0151983776 -0.0301644940 -2.157025e-03 0.070 0.930
D0103 -0.0058062776 -0.0111387262 -5.611137e-04 0.073 0.927
B0903 -0.0040583726 -0.0082299820 -3.182127e-04 0.078 0.922
B0711 -0.0116387135 -0.0239457783 -1.087371e-03 0.079 0.921
D0307 -0.0106690266 -0.0219998777 -7.160822e-04 0.083 0.917
B0702 -0.0164229499 -0.0340972191 -1.406339e-03 0.085 0.915
D0508 -0.0025923862 -0.0054092788 -1.884571e-04 0.089 0.911
D0908 -0.0351466114 -0.0789647021 -1.049356e-03 0.091 0.909
D0912 -0.0009892400 -0.0020007917 -1.425600e-05 0.092 0.908
B0303 -0.0072114512 -0.0152002291 -7.612867e-05 0.097 0.903
#Positively associated
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements <- bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_elements$function_legend)

all_elements %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.15,0.15)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

community_elements %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
    filter(trait %in% positive$trait) %>%
    mutate(trait=factor(trait, levels=positive$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.5.1.1 GIFT test visualization

# Aggregate bundle-level GIFTs into the compound level
genome_counts_filt_filt <- tibble::rownames_to_column(genome_counts_filt_filt, var = "genome")
GIFTs_elements_filtered <- elements_table[rownames(elements_table) %in% genome_counts_filt_filt$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
  select_if(~ !is.numeric(.) || sum(.) != 0)

# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_elements_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ Elevation, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) +
        labs(y="Traits",x="Samples",fill="GIFT")

9.5.2 Functional level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p2<-function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  tt() |>
      style_tt(
        i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
        background = "#B7BCCE")

# Negatively associated (there isn't)
#function_predictions %>%
  #filter(mean <0) %>%
  #arrange(-negative_support) %>%
  #filter(negative_support>=0.9) %>%
  #tt() |>
      #style_tt(
        #i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
        #background = "#E5D5B1") |>
      #style_tt(
        #i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
        #background = "#B7BCCE")
# Positively associated
p2 %>%
  tt()
tinytable_senewvs8yr60t1hvmtjk
trait mean p1 p9 positive_support negative_support
S03 0.026906176 0.0104593848 0.044524683 0.973 0.027
B04 0.018264988 0.0051591436 0.033137776 0.970 0.030
D03 0.010497822 0.0021459296 0.019986199 0.943 0.057
D06 0.003285998 0.0004532722 0.006338923 0.933 0.067
B07 0.012228797 0.0004053295 0.024691062 0.906 0.094
#Negatively associated (there isn't)
all_functions <- function_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_functions$function_legend)

all_functions %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.05,0.06)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      guides(col = guide_legend(ncol = 1))

community_functions %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% function_predictions$trait) %>%
  mutate(trait=factor(trait, levels=function_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.5.2.1 GIFT test visualization

# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db)
functions <- GIFTs_functions %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_functions_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")

9.5.3 Domain level

domains_table <- functions_table %>%
    to.domains(., GIFT_db) %>%
    as.data.frame()

community_domains <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(domains_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

domain_predictions <- map_dfc(community_domains, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_domains[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p3<-positive_filtered<-domain_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) 

if (nrow(positive_filtered) > 0) {
  positive_filtered %>%
  tt() |>
      style_tt(
        i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}

# Negatively associated (there isn't)
#domain_predictions %>%
  #filter(mean <0) %>%
  #arrange(-negative_support) %>%
  #filter(negative_support>=0.9) %>%
  #tt() |>
      #style_tt(
        #i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
        #background = "#E5D5B1") |>
      #style_tt(
        #i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
        #background = "#B7BCCE")
# Positively associated
p3 %>%
  tt()
tinytable_6cnhubh0uxh10xm7v2h9
trait mean p1 p9 positive_support negative_support
Overall 0.006863701 0.0003939157 0.01509651 0.917 0.083
# Negatively associated (there isn't)
all_domains <- domain_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9) %>% 
  unique()

all_domains %>%
  ggplot(aes(x=mean, y=fct_reorder(trait, mean), xmin=p1, xmax=p9, color=trait)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.03)) +
      geom_vline(xintercept=0) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Domain level") +
      guides(col = guide_legend(ncol = 1))

community_domains %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% domain_predictions$trait) %>%
  mutate(trait=factor(trait, levels=domain_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.5.3.1 GIFT test visualization

# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db)
domains <- GIFTs_domains %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_domains_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")